/*-------------------------------------------------------------------------
 *
 * pg_control.h
 *      The system control file "pg_control" is not a heap relation.
 *      However, we define it here so that the format is documented.
 *
 *
 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * src/include/catalog/pg_control.h
 *
 *-------------------------------------------------------------------------
 */
#ifndef PG_CONTROL_H
#define PG_CONTROL_H

#include "access/xlogdefs.h"
#include "pgtime.h"                /* for pg_time_t */
#include "port/pg_crc32c.h"


/* Version identifier for this pg_control format */
#define PG_CONTROL_VERSION    1002

/* Nonce key length, see below */
#define MOCK_AUTH_NONCE_LEN        32

/*
 * Body of CheckPoint XLOG records.  This is declared here because we keep
 * a copy of the latest one in pg_control for possible disaster recovery.
 * Changing this struct requires a PG_CONTROL_VERSION bump.
 */
typedef struct CheckPoint
{
    XLogRecPtr    redo;            /* next RecPtr available when we began to
                                 * create CheckPoint (i.e. REDO start point) */
    TimeLineID    ThisTimeLineID; /* current TLI */
    TimeLineID    PrevTimeLineID; /* previous TLI, if this record begins a new
                                 * timeline (equals ThisTimeLineID otherwise) */
    bool        fullPageWrites; /* current full_page_writes */
    uint32        nextXidEpoch;    /* higher-order bits of nextXid */
    TransactionId nextXid;        /* next free XID */
    Oid            nextOid;        /* next free OID */
    MultiXactId nextMulti;        /* next free MultiXactId */
    MultiXactOffset nextMultiOffset;    /* next free MultiXact offset */
    TransactionId oldestXid;    /* cluster-wide minimum datfrozenxid */
    Oid            oldestXidDB;    /* database with minimum datfrozenxid */
    MultiXactId oldestMulti;    /* cluster-wide minimum datminmxid */
    Oid            oldestMultiDB;    /* database with minimum datminmxid */
    pg_time_t    time;            /* time stamp of checkpoint */
    TransactionId oldestCommitTsXid;    /* oldest Xid with valid commit
                                         * timestamp */
    TransactionId newestCommitTsXid;    /* newest Xid with valid commit
                                         * timestamp */

#ifdef __SUPPORT_DISTRIBUTED_TRANSACTION__
    GlobalTimestamp latestCommitTs;
    GlobalTimestamp    latestGTS;
#endif

    /*
     * Oldest XID still running. This is only needed to initialize hot standby
     * mode from an online checkpoint, so we only bother calculating this for
     * online checkpoints and only when wal_level is replica. Otherwise it's
     * set to InvalidTransactionId.
     */
    TransactionId oldestActiveXid;

} CheckPoint;

/* XLOG info values for XLOG rmgr */
#define XLOG_CHECKPOINT_SHUTDOWN        0x00
#define XLOG_CHECKPOINT_ONLINE            0x10
#define XLOG_NOOP                        0x20
#define XLOG_NEXTOID                    0x30
#define XLOG_SWITCH                        0x40
#define XLOG_BACKUP_END                    0x50
#define XLOG_PARAMETER_CHANGE            0x60
#define XLOG_RESTORE_POINT                0x70
#define XLOG_FPW_CHANGE                    0x80
#define XLOG_END_OF_RECOVERY            0x90
#define XLOG_FPI_FOR_HINT                0xA0
#define XLOG_FPI                        0xB0
#ifdef __TBASE__
#define XLOG_MVCC                        0xC0
#endif
/* remove 2pc file while 2pc is cleaned*/
#define XLOG_CLEAN_2PC_FILE        0XD0
#define XLOG_CREATE_2PC_FILE    0xE0
#define XLOG_RECORD_2PC_TIMESTAMP 0xF0

/*
 * System status indicator.  Note this is stored in pg_control; if you change
 * it, you must bump PG_CONTROL_VERSION
 */
typedef enum DBState
{
    DB_STARTUP = 0,
    DB_SHUTDOWNED,
    DB_SHUTDOWNED_IN_RECOVERY,
    DB_SHUTDOWNING,
    DB_IN_CRASH_RECOVERY,
    DB_IN_ARCHIVE_RECOVERY,
    DB_IN_PRODUCTION
} DBState;

/*
 * Contents of pg_control.
 */

typedef struct ControlFileData
{
    /*
     * Unique system identifier --- to ensure we match up xlog files with the
     * installation that produced them.
     */
    uint64        system_identifier;

    /*
     * Version identifier information.  Keep these fields at the same offset,
     * especially pg_control_version; they won't be real useful if they move
     * around.  (For historical reasons they must be 8 bytes into the file
     * rather than immediately at the front.)
     *
     * pg_control_version identifies the format of pg_control itself.
     * catalog_version_no identifies the format of the system catalogs.
     *
     * There are additional version identifiers in individual files; for
     * example, WAL logs contain per-page magic numbers that can serve as
     * version cues for the WAL log.
     */
    uint32        pg_control_version; /* PG_CONTROL_VERSION */
    uint32        catalog_version_no; /* see catversion.h */

    /*
     * System status data
     */
    DBState        state;            /* see enum above */
    pg_time_t    time;            /* time stamp of last pg_control update */
    XLogRecPtr    checkPoint;        /* last check point record ptr */
    XLogRecPtr    prevCheckPoint; /* previous check point record ptr */

    CheckPoint    checkPointCopy; /* copy of last check point record */

    XLogRecPtr    unloggedLSN;    /* current fake LSN value, for unlogged rels */

    /*
     * These two values determine the minimum point we must recover up to
     * before starting up:
     *
     * minRecoveryPoint is updated to the latest replayed LSN whenever we
     * flush a data change during archive recovery. That guards against
     * starting archive recovery, aborting it, and restarting with an earlier
     * stop location. If we've already flushed data changes from WAL record X
     * to disk, we mustn't start up until we reach X again. Zero when not
     * doing archive recovery.
     *
     * backupStartPoint is the redo pointer of the backup start checkpoint, if
     * we are recovering from an online backup and haven't reached the end of
     * backup yet. It is reset to zero when the end of backup is reached, and
     * we mustn't start up before that. A boolean would suffice otherwise, but
     * we use the redo pointer as a cross-check when we see an end-of-backup
     * record, to make sure the end-of-backup record corresponds the base
     * backup we're recovering from.
     *
     * backupEndPoint is the backup end location, if we are recovering from an
     * online backup which was taken from the standby and haven't reached the
     * end of backup yet. It is initialized to the minimum recovery point in
     * pg_control which was backed up last. It is reset to zero when the end
     * of backup is reached, and we mustn't start up before that.
     *
     * If backupEndRequired is true, we know for sure that we're restoring
     * from a backup, and must see a backup-end record before we can safely
     * start up. If it's false, but backupStartPoint is set, a backup_label
     * file was found at startup but it may have been a leftover from a stray
     * pg_start_backup() call, not accompanied by pg_stop_backup().
     */
    XLogRecPtr    minRecoveryPoint;
    TimeLineID    minRecoveryPointTLI;
    XLogRecPtr    backupStartPoint;
    XLogRecPtr    backupEndPoint;
    bool        backupEndRequired;

    /*
     * Parameter settings that determine if the WAL can be used for archival
     * or hot standby.
     */
    int            wal_level;
    bool        wal_log_hints;
    int            MaxConnections;
    int            max_worker_processes;
    int            max_prepared_xacts;
    int            max_locks_per_xact;
    bool        track_commit_timestamp;

    /*
     * This data is used to check for hardware-architecture compatibility of
     * the database and the backend executable.  We need not check endianness
     * explicitly, since the pg_control version will surely look wrong to a
     * machine of different endianness, but we do need to worry about MAXALIGN
     * and floating-point format.  (Note: storage layout nominally also
     * depends on SHORTALIGN and INTALIGN, but in practice these are the same
     * on all architectures of interest.)
     *
     * Testing just one double value is not a very bulletproof test for
     * floating-point compatibility, but it will catch most cases.
     */
    uint32        maxAlign;        /* alignment requirement for tuples */
    double        floatFormat;    /* constant 1234567.0 */
#define FLOATFORMAT_VALUE    1234567.0

    /*
     * This data is used to make sure that configuration of this database is
     * compatible with the backend executable.
     */
    uint32        blcksz;            /* data block size for this DB */
    uint32        relseg_size;    /* blocks per segment of large relation */

    uint32        xlog_blcksz;    /* block size within WAL files */
    uint32        xlog_seg_size;    /* size of each WAL segment */

    uint32        nameDataLen;    /* catalog name field width */
    uint32        indexMaxKeys;    /* max number of columns in an index */

    uint32        toast_max_chunk_size;    /* chunk size in TOAST tables */
    uint32        loblksize;        /* chunk size in pg_largeobject */

    /* flags indicating pass-by-value status of various types */
    bool        float4ByVal;    /* float4 pass-by-value? */
    bool        float8ByVal;    /* float8, int8, etc pass-by-value? */

    /* Are data pages protected by checksums? Zero if no checksum version */
    uint32        data_checksum_version;

    /*
     * Random nonce, used in authentication requests that need to proceed
     * based on values that are cluster-unique, like a SASL exchange that
     * failed at an early stage.
     */
    char        mock_authentication_nonce[MOCK_AUTH_NONCE_LEN];

#ifdef __TBASE__
    /*
     * need mvcc if page is all visible?
     */
    int32       need_mvcc;
    /* reserved */
    int32       reserved_1;
    int32       reserved_2;
    int32       reserved_3;
    int32       reserved_4;
    int32       reserved_5;
#endif

    /* CRC of all above ... MUST BE LAST! */
    pg_crc32c    crc;
} ControlFileData;

/*
 * Maximum safe value of sizeof(ControlFileData).  For reliability's sake,
 * it's critical that pg_control updates be atomic writes.  That generally
 * means the active data can't be more than one disk sector, which is 512
 * bytes on common hardware.  Be very careful about raising this limit.
 */
#define PG_CONTROL_MAX_SAFE_SIZE    512

/*
 * Physical size of the pg_control file.  Note that this is considerably
 * bigger than the actually used size (ie, sizeof(ControlFileData)).
 * The idea is to keep the physical size constant independent of format
 * changes, so that ReadControlFile will deliver a suitable wrong-version
 * message instead of a read error if it's looking at an incompatible file.
 */
#define PG_CONTROL_FILE_SIZE        8192

#endif                            /* PG_CONTROL_H */
